county_train_stayhome <- read_feather('./data/county_train_stayhome.feather')
county_train_decrease <- read_feather('./data/county_train_decrease.feather')
stayhome_set <- county_train_stayhome %>% 
  distinct(fips, county, state, nchs, pop, threshold_day, 
           gt50, gt500, schools, restaurants, entertainment, stayhome,
           days_btwn_stayhome_thresh, days_btwn_decrease_thresh,
           decrease_50_total_visiting, decrease_50_recreation_visiting, decrease_50_school_visits, 
           decrease_40_total_visiting, decrease_40_recreation_visiting, decrease_40_school_visits, 
           decrease_on_stayhome, roll_decrease_on_stayhome, min_decrease)
decrease_set <- county_train_decrease %>% 
  distinct(fips, county, state, nchs, pop, threshold_day, 
           gt50, gt500, schools, restaurants, entertainment, stayhome, 
           days_btwn_stayhome_thresh, days_btwn_decrease_thresh,
           decrease_50_total_visiting, decrease_50_recreation_visiting, decrease_50_school_visits, 
           decrease_40_total_visiting, decrease_40_recreation_visiting, decrease_40_school_visits,  
           decrease_on_stayhome, roll_decrease_on_stayhome, min_decrease)
length(unique(c(stayhome_set$fips, decrease_set$fips)))
## [1] 440
county_set <- full_join(stayhome_set, decrease_set)
## Joining, by = c("fips", "county", "state", "pop", "stayhome", "gt50", "gt500", "schools", "restaurants", "entertainment", "decrease_40_recreation_visiting", "decrease_40_school_visits", "decrease_40_total_visiting", "decrease_50_recreation_visiting", "decrease_50_school_visits", "decrease_50_total_visiting", "nchs", "threshold_day", "days_btwn_stayhome_thresh", "days_btwn_decrease_thresh", "decrease_on_stayhome", "roll_decrease_on_stayhome", "min_decrease")
table(county_set$nchs)
## 
##   1   2   3   4   5   6 
##  57 120 121  67  52  23
length(setdiff(stayhome_set$fips, decrease_set$fips))
## [1] 84
length(setdiff(decrease_set$fips, stayhome_set$fips))
## [1] 20
nchs <- read_feather("./data/nchs.feather")
table(nchs$nchs)
## 
##    1    2    3    4    5    6 
##   68  368  373  358  641 1339
tot <- as.numeric(tapply(nchs$popul, nchs$nchs, sum, na.rm = TRUE))
set <- as.numeric(tapply(county_set$pop, county_set$nchs, sum, na.rm = TRUE))
set / tot
## [1] 0.97401860 0.62361152 0.63061366 0.31130822 0.12547190 0.03765281
set
## [1] 93305294 48322949 41288776  8999127  3414935   716641
intrv_labels = c("non-essential \n business closures", 
                 "gatherings of \n 50+ ban", 
                 "gatherings of \n 500+ ban", 
                 "restaurants \n closures", 
                 "schools closures", 
                 "stay-at-home \n orders")

p1 <- county_set %>% 
  select(fips, gt50, gt500, 
         schools, restaurants, entertainment, 
         stayhome) %>% 
  gather(key, value, -fips) %>% 
  mutate(key = factor(key, labels = intrv_labels), 
         title = "Array of non-pharmaceutical interventions") %>% 
  ggplot(aes(x = value, fill = key)) + 
  geom_bar() + 
  scale_x_date(name = "", limits = as.Date(c("2020-03-10", "2020-04-15"))) + 
  scale_y_continuous(limits = c(0,500)) +
  theme(axis.text.x =  element_blank(), 
        legend.key.width = unit(10, "pt")) + 
  labs(fill = "") +
  facet_wrap(~ title)
leg1 <- get_legend(p1 + theme(legend.box.margin = margin(0, 0, 0, 0)))
## Warning: Removed 20 rows containing non-finite values (stat_count).
## Warning: Removed 1 rows containing missing values (geom_bar).
p1
## Warning: Removed 20 rows containing non-finite values (stat_count).

## Warning: Removed 1 rows containing missing values (geom_bar).

intrv_labels = c("50% decrease in \n leisure visits", 
                 "50% decrease in \n education visits",
                 "50% decrease in \n total visits")

p2 <- county_set %>% 
  select(fips, 
         decrease_50_school_visits, 
         decrease_50_recreation_visiting,
         decrease_50_total_visiting) %>% 
  gather(key, value, -fips) %>% 
  mutate(key = factor(key, labels = intrv_labels), 
         title = "Array of mobility interventions") %>% 
  ggplot(aes(x = value, fill = key)) + 
  geom_bar() + 
  scale_x_date(name = "", limits = as.Date(c("2020-03-10", "2020-04-15"))) + 
  scale_y_continuous(limits = c(0,500)) + 
  theme(legend.key.width = unit(10, "pt")) + 
  labs(fill = "") +
  facet_wrap(~ title)
leg2 <- get_legend(p2 + theme(legend.box.margin = margin(0, 0, 0, 0)))
## Warning: Removed 83 rows containing non-finite values (stat_count).
p2
## Warning: Removed 83 rows containing non-finite values (stat_count).

prow1 <- plot_grid(p1 + theme(legend.position="none"), 
                   leg1,  ncol = 2, rel_widths = c(2,1))
## Warning: Removed 20 rows containing non-finite values (stat_count).
## Warning: Removed 1 rows containing missing values (geom_bar).
prow2 <- plot_grid(p2 + theme(legend.position="none"), 
                   leg2, ncol = 2, rel_widths = c(2,1))
## Warning: Removed 83 rows containing non-finite values (stat_count).
plot_grid(prow1, prow2, nrow = 2)

ggsave("./images/intrv_timeline.pdf", width = 5, height = 4)
summary(as.numeric(c(county_set$gt50 - county_set$stayhome, 
                     county_set$gt50 - county_set$stayhome)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -20.000 -12.000  -6.000  -7.339   0.000   0.000      36
summary(as.numeric(c(county_set$restaurants - county_set$stayhome, 
                     county_set$entertainment - county_set$stayhome)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -20.000 -13.000  -7.000  -8.563  -6.000  14.000      36
summary(as.numeric(c(county_set$schools - county_set$stayhome)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -22.00  -15.00   -8.00   -9.64   -6.00    0.00      18
summary(c(county_set$gt50, 
          county_set$gt500, 
          county_set$schools, 
          county_set$entertainment, 
          county_set$restaurants))
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-12" "2020-03-17" "2020-03-18" "2020-03-19" "2020-03-21" "2020-04-07" 
##         NA's 
##          "2"
summary(county_set$stayhome)
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-20" "2020-03-24" "2020-03-26" "2020-03-28" "2020-04-03" "2020-04-08" 
##         NA's 
##         "18"
tapply(county_set$stayhome, county_set$nchs, summary)
## $`1`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-20" "2020-03-23" "2020-03-25" "2020-03-25" "2020-03-27" "2020-04-07" 
##         NA's 
##          "1" 
## 
## $`2`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-20" "2020-03-24" "2020-03-25" "2020-03-27" "2020-04-01" "2020-04-07" 
##         NA's 
##          "1" 
## 
## $`3`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-20" "2020-03-24" "2020-03-27" "2020-03-29" "2020-04-03" "2020-04-08" 
##         NA's 
##          "7" 
## 
## $`4`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-20" "2020-03-24" "2020-03-25" "2020-03-28" "2020-04-04" "2020-04-08" 
##         NA's 
##          "3" 
## 
## $`5`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-23" "2020-03-25" "2020-03-31" "2020-03-29" "2020-04-04" "2020-04-07" 
##         NA's 
##          "4" 
## 
## $`6`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-23" "2020-03-25" "2020-03-31" "2020-03-31" "2020-04-05" "2020-04-08" 
##         NA's 
##          "2"
summary(county_set$decrease_50_total_visiting)
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-11" "2020-03-21" "2020-03-22" "2020-03-22" "2020-03-24" "2020-04-13" 
##         NA's 
##          "1"
tapply(county_set$decrease_50_total_visiting, county_set$nchs, summary)
## $`1`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-18" "2020-03-21" "2020-03-21" "2020-03-21" "2020-03-22" "2020-03-26" 
## 
## $`2`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-19" "2020-03-21" "2020-03-22" "2020-03-22" "2020-03-23" "2020-03-25" 
## 
## $`3`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-15" "2020-03-21" "2020-03-22" "2020-03-22" "2020-03-24" "2020-04-03" 
## 
## $`4`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-11" "2020-03-22" "2020-03-22" "2020-03-22" "2020-03-24" "2020-03-27" 
## 
## $`5`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-13" "2020-03-22" "2020-03-24" "2020-03-25" "2020-03-25" "2020-04-13" 
## 
## $`6`
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-03-14" "2020-03-21" "2020-03-23" "2020-03-23" "2020-03-25" "2020-04-09" 
##         NA's 
##          "1"
tapply(as.numeric(county_set$decrease_50_school_visits - county_set$schools), 
       county_set$nchs, summary)
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -25.000  -2.000   0.000  -1.368   1.000   3.000 
## 
## $`2`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -28.000  -4.250   0.000  -3.675   2.000   4.000 
## 
## $`3`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -26.000  -2.000   1.000  -1.308   2.000   7.000       1 
## 
## $`4`
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -25.0000  -1.0000   1.0000  -0.6154   2.0000   4.0000        2 
## 
## $`5`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -25.000  -6.000   0.000  -2.755   2.000   4.000       3 
## 
## $`6`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -15.0     0.0     1.0    -0.5     2.0     4.0       3
tapply(as.numeric(county_set$decrease_50_recreation_visiting - county_set$restaurants), county_set$nchs, summary)
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -5.000   3.000   4.000   4.018   6.000  10.000 
## 
## $`2`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -9.000   4.000   6.000   5.609   7.000  18.000       5 
## 
## $`3`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -13.00    4.00    6.00    5.13    7.00   12.00       6 
## 
## $`4`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -10.000   5.000   7.000   6.297   8.000  23.000       3 
## 
## $`5`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -5.00    6.00    8.50   11.09   15.00   50.00      20 
## 
## $`6`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -7.000   6.000   7.000   8.357  14.250  19.000       9
county_set %>% 
  group_by(schools, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, schools) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>% 
  ggplot() +
    geom_line(aes(x = schools, y = rel_n, col = nchs))
## `summarise()` has grouped output by 'schools'. You can override using the `.groups` argument.

county_set %>% 
  group_by(stayhome, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, stayhome) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n) %>% 
  ungroup() %>% 
  ggplot() +
    geom_line(aes(x = stayhome, y = rel_n, col = nchs))
## `summarise()` has grouped output by 'stayhome'. You can override using the `.groups` argument.
## Warning: Removed 6 row(s) containing missing values (geom_path).

county_set %>% 
  group_by(decrease_50_total_visiting, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, decrease_50_total_visiting) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n) %>% 
  ungroup() %>% 
  ggplot() +
    geom_line(aes(x = decrease_50_total_visiting, y = rel_n, col = nchs))
## `summarise()` has grouped output by 'decrease_50_total_visiting'. You can override using the `.groups` argument.
## Warning: Removed 1 row(s) containing missing values (geom_path).

county_set %>% 
   mutate(decrease_on_stayhome_ = 1 - decrease_on_stayhome) %>% 
  ggplot() + 
  geom_density(aes(x = decrease_on_stayhome_, col = nchs))
## Warning: Removed 18 rows containing non-finite values (stat_density).

county_set %>% 
  mutate(roll_decrease_on_stayhome_ = 1 - roll_decrease_on_stayhome) %>% 
  ggplot() + 
  geom_density(aes(x = roll_decrease_on_stayhome_, col = nchs))
## Warning: Removed 18 rows containing non-finite values (stat_density).

county_set %>% 
  mutate(min_decrease_ = 1 - min_decrease) %>% 
  ggplot() + 
  geom_density(aes(x = min_decrease_, col = nchs))
## Warning: Removed 18 rows containing non-finite values (stat_density).

county_set %>% 
   mutate(decrease_on_stayhome_ = 1 - decrease_on_stayhome) %>%
  group_by(nchs) %>% 
  summarise(mu = mean(decrease_on_stayhome_, na.rm = TRUE))
## # A tibble: 6 x 2
##   nchs     mu
## * <fct> <dbl>
## 1 1     0.697
## 2 2     0.703
## 3 3     0.705
## 4 4     0.690
## 5 5     0.658
## 6 6     0.664
plot_a <- county_set %>% 
   mutate(decrease_on_stayhome_ = 1 - decrease_on_stayhome) %>% 
  ggplot() + 
  geom_density(aes(x = decrease_on_stayhome_, 
                   col = nchs, 
                   fill = nchs, 
                   alpha = if_else(nchs %in% c(1,6), 1, 0)), 
               size = 1) + 
  scale_alpha_continuous(range = c(0,0.25)) +
  guides(alpha = FALSE) + 
  labs(x = "Relative mobility decrease on stayhome") + 
  theme_minimal_grid() +
  theme(axis.text.y = element_blank())
#ggsave("./images/decrease.pdf", height = 3, width = 4.5)
plot_a
## Warning: Removed 18 rows containing non-finite values (stat_density).

diff_decrease_ <- county_set %>% 
  mutate(diff_decrease = roll_decrease_on_stayhome - min_decrease) %>% 
  pull(diff_decrease) 
  
quantile(diff_decrease_, c(.75, .80, .9), na.rm = TRUE)
##       75%       80%       90% 
## 0.1053080 0.1186878 0.1765097
plot_b <- county_set %>% 
  mutate(diff_decrease = roll_decrease_on_stayhome - min_decrease, 
         q = ifelse(diff_decrease <= 0.12, "1", NA)) %>%
  ggplot() + 
  geom_histogram(aes(x = diff_decrease, 
                     fill = q), 
                     col = "white", size = 0.7, alpha = 0.6) + 
  #geom_vline(xintercept = 0.12, lty = 2) +
  guides(fill = FALSE, col = FALSE) + 
  labs(x = "Additional decrease after stayhome") + 
  theme_minimal_grid() +
  annotate("text", x=0.06, y=30, label= "80%", size = 7)
#ggsave("./images/min_decrease.pdf", height = 3, width = 4.5)
plot_b
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 18 rows containing non-finite values (stat_bin).

plot_grid(plot_a, plot_b, nrow = 1, rel_widths = c(1.2,1))
## Warning: Removed 18 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 18 rows containing non-finite values (stat_bin).

# ggsave("./images/decrease.pdf", width = 9, height = 3)
school_dist <- county_set %>% 
  arrange(schools) %>% 
  mutate(one = 1, 
         cum_n = cumsum(one), 
         rel_n = cum_n / n())

mob_dist <- county_set %>% 
  arrange(decrease_50_total_visiting) %>% 
  mutate(one = 1, 
         cum_n = cumsum(one), 
         rel_n = cum_n / n())

stay_dist <- county_set %>% 
  arrange(stayhome) %>% 
  mutate(one = 1, 
         cum_n = cumsum(one), 
         rel_n = cum_n / n())

county_set %>% 
  group_by(threshold_day, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, threshold_day) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>%
  ggplot() +
    geom_line(aes(x = threshold_day, y = rel_n, col = nchs), size = 1) +
    geom_line(data = school_dist, aes(x = schools, y = rel_n), size = 1) +
  geom_line(data = stay_dist, aes(x = stayhome, y = rel_n), size = 1) +
  geom_line(data = mob_dist, aes(x = decrease_50_total_visiting, y = rel_n), size = 1) + 
  labs(y="", x="a)") + 
  guides(col = FALSE) +
  theme_minimal_grid() + 
  theme(plot.margin = margin(l=-1))
## `summarise()` has grouped output by 'threshold_day'. You can override using the `.groups` argument.
## Warning: Removed 18 row(s) containing missing values (geom_path).
## Warning: Removed 1 row(s) containing missing values (geom_path).

county_set <- county_set %>% 
  mutate(days_btwn_decrease_stayhome = as.numeric(decrease_50_total_visiting - stayhome))

summary(county_set$days_btwn_decrease_stayhome)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -24.000 -10.000  -5.000  -5.836  -2.000  19.000      19
p1 <- county_set %>% 
  ggplot() + 
  geom_boxplot(aes(x = nchs, y = days_btwn_stayhome_thresh, fill = nchs)) + 
  guides(fill = FALSE) + 
  labs(y = "Stayhome-at-home")

p2 <- county_set %>% 
  ggplot() + 
  geom_boxplot(aes(x = nchs, y = days_btwn_decrease_thresh, fill = nchs)) + 
  guides(fill = FALSE) + 
  labs(y = "Mobility decrease")

plot_grid(p1, p2, nrow = 2)
## Warning: Removed 18 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

nchs_labels = c("Large central metro", 
                "Large fringe metro", 
                "Medium metro", 
                "Small metro", 
                "Micropolitan", 
                "None-core")
county_set %>% 
  group_by(threshold_day, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, threshold_day) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>%
  mutate(NCHS = factor(nchs, labels = nchs_labels)) %>% 
  ggplot() +
    geom_line(aes(x = threshold_day, y = rel_n, col = NCHS), size = 1) + 
  labs(y="", x="Threshold day") + 
  theme_minimal_grid() + 
  theme(plot.margin = margin(l=-1))
## `summarise()` has grouped output by 'threshold_day'. You can override using the `.groups` argument.

ggsave("./images/threshold.pdf", width = 6, height = 3)
xx <- county_set %>% 
  select(fips, nchs, days_btwn_stayhome_thresh, days_btwn_decrease_thresh) %>% 
  pivot_longer(cols = starts_with("days_btwn"), 
               names_to = "intrv", 
               names_prefix = "days_btwn_", 
               values_to = "days_btwn")
nchs_labels = c("Large central metro", 
                "Large fringe metro", 
                "Medium metro", 
                "Small metro", 
                "Micropolitan", 
                "None-core")
intrv_labels = c("Mobility \n Decrease", "Stayhome \n Orders")
xx %>% 
  mutate(intrv = factor(intrv, labels = intrv_labels), 
         nchs = factor(nchs, labels = nchs_labels)) %>% 
  ggplot(aes(y = days_btwn, x = intrv, fill = nchs)) + 
  geom_boxplot() +  
  facet_wrap(~ nchs, ncol = 6) + 
  guides(fill = FALSE) + 
  labs(x = "", y = "Days since threshold") + 
  theme_minimal_grid() 
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

ggsave("./images/days_since_thresh.pdf", width = 12, height = 4)
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
nchs_labels = c("Large \ncentral \nmetro", 
                "Large \nfringe \nmetro", 
                "Medium \nmetro", 
                "Small \nmetro", 
                "Micropolitan", 
                "None-core")
xx <- county_set %>%  
  mutate(days_btwn_stayhome_decrease = days_btwn_stayhome_thresh - days_btwn_decrease_thresh) 
xx %>% 
  mutate(nchs = factor(nchs, labels = nchs_labels)) %>% 
  ggplot(aes(y = days_btwn_stayhome_decrease, x = nchs, fill = nchs)) + 
  geom_boxplot() +
  guides(fill = FALSE) + 
  labs(x = "", y = "Days between stayhome \nand mobility decrease") + 
  theme_minimal_grid() 
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

#ggsave("./images/days_btwn_stayhome_decrease.pdf")
p1 <- county_set %>% 
  ggplot() + 
  geom_histogram(aes(x = days_btwn_stayhome_thresh, 
                     fill = nchs), 
                 position = "dodge", bins = 15, alpha = 0.75)

p2 <- county_set %>% 
  ggplot() + 
  geom_histogram(aes(x = days_btwn_decrease_thresh, 
                     fill = nchs), 
                 position = "dodge", bins = 15, alpha = 0.75) 

plot_grid(p1, p2, nrow = 2)
## Warning: Removed 18 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

county_set %>% 
  ggplot() + 
  geom_density(aes(x = days_btwn_stayhome_thresh, col = nchs))
## Warning: Removed 18 rows containing non-finite values (stat_density).

summary(county_set$days_btwn_stayhome_thresh)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -21.0000  -5.0000   0.0000  -0.0545   5.0000  29.0000       18
county_set %>% 
  ggplot() + 
  geom_density(aes(x = days_btwn_decrease_thresh, col = nchs)) 
## Warning: Removed 1 rows containing non-finite values (stat_density).

summary(county_set$days_btwn_decrease_thresh)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -30.000 -11.000  -5.000  -5.836  -1.000  20.000       1
county_set %>% 
  ggplot() + 
  geom_density(aes(x = days_btwn_decrease_stayhome, col = nchs))
## Warning: Removed 19 rows containing non-finite values (stat_density).

summary(stayhome_set$days_btwn_stayhome_thresh)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -21.00000  -5.00000   0.00000   0.01905   5.00000  29.00000
#summary(decrease_set$days_btwn_stayhome_thresh)
#summary(stayhome_set$days_btwn_decrease_thresh)
summary(decrease_set$days_btwn_decrease_thresh)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -21.000  -9.000  -4.000  -4.174   0.000  20.000
summary(county_set$days_btwn_decrease_stayhome)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -24.000 -10.000  -5.000  -5.836  -2.000  19.000      19
tapply(county_set$days_btwn_stayhome_thresh, county_set$nchs, median, na.rm = TRUE)
##  1  2  3  4  5  6 
##  5  0  0  0 -3 -5
tapply(county_set$days_btwn_decrease_thresh, county_set$nchs, median, na.rm = TRUE)
##   1   2   3   4   5   6 
##   0  -5  -6  -5 -10 -11
tapply(county_set$days_btwn_decrease_stayhome, county_set$nchs, median, na.rm = TRUE)
##  1  2  3  4  5  6 
## -4 -4 -6 -4 -6 -7
tapply(stayhome_set$days_btwn_stayhome_thresh, stayhome_set$nchs, quantile, 0.25)
##     1     2     3     4     5     6 
## -0.25 -5.00 -5.00 -5.75 -7.00 -8.00
tapply(decrease_set$days_btwn_decrease_thresh, decrease_set$nchs, quantile, 0.25)
##   1   2   3   4   5   6 
##  -4  -8  -9 -10 -12 -16
tapply(county_set$days_btwn_decrease_stayhome, county_set$nchs, quantile, 0.25, na.rm = TRUE)
##      1      2      3      4      5      6 
##  -5.25  -9.00 -11.00 -11.00 -10.25 -11.25
tapply(stayhome_set$days_btwn_stayhome_thresh, stayhome_set$nchs, quantile, 0.75)
##    1    2    3    4    5    6 
## 8.00 6.00 4.00 4.75 1.50 0.00
tapply(decrease_set$days_btwn_decrease_thresh, decrease_set$nchs, quantile, 0.75)
##     1     2     3     4     5     6 
##  5.00  0.75 -2.00 -1.00  0.00 -5.00
tapply(county_set$days_btwn_decrease_stayhome, county_set$nchs, quantile, 0.75, na.rm = TRUE)
##     1     2     3     4     5     6 
## -1.75 -2.00 -2.00 -2.00 -0.75 -3.75
plot_a <- county_set %>% 
  group_by(threshold_day, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, threshold_day) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>%
  ggplot() +
    geom_line(aes(x = threshold_day, y = rel_n, col = nchs), size = 1) + 
  labs(y="", x="a)") + 
  guides(col = FALSE) +
  theme_minimal_grid() + 
  theme(plot.margin = margin(l=-1))
## `summarise()` has grouped output by 'threshold_day'. You can override using the `.groups` argument.
plot_a

county_set %>% 
  group_by(nchs) %>% 
  summarise(mean(threshold_day))
## # A tibble: 6 x 2
##   nchs  `mean(threshold_day)`
## * <fct> <date>               
## 1 1     2020-03-21           
## 2 2     2020-03-27           
## 3 3     2020-03-28           
## 4 4     2020-03-29           
## 5 5     2020-04-02           
## 6 6     2020-04-04
plot_b <- county_set %>% 
  select(nchs, days_btwn_stayhome_thresh) %>% 
  filter(!is.na(days_btwn_stayhome_thresh)) %>% 
  group_by(days_btwn_stayhome_thresh, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, days_btwn_stayhome_thresh) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>% 
  ggplot() +
   geom_line(aes(x = days_btwn_stayhome_thresh, y = rel_n, col = nchs), size = 1) +
   scale_x_continuous(name = "b)",
                    breaks = c(-30,-20,-10,0,10,20,30),
                    limits = c(-35, 30)) +
   geom_vline(xintercept = 0, lty = 2, size = 0.75) +
  labs(y="") + 
  guides(col = FALSE) +
  theme_minimal_grid() + 
  theme(axis.text.y =  element_blank(), 
        plot.margin = margin(l=-1))  
## `summarise()` has grouped output by 'days_btwn_stayhome_thresh'. You can override using the `.groups` argument.
plot_b

plot_c <- county_set %>% 
  select(nchs, days_btwn_decrease_thresh) %>% 
  filter(!is.na(days_btwn_decrease_thresh)) %>% 
  group_by(days_btwn_decrease_thresh, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, days_btwn_decrease_thresh) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>% 
  ggplot() +
   geom_line(aes(x = days_btwn_decrease_thresh, y = rel_n, col = nchs), size = 1) +
   geom_vline(xintercept = 0, lty = 2, size = 0.75) +
   scale_x_continuous(name = "c)",
                    breaks = c(-30,-20,-10,0,10,20,30),
                    limits = c(-35, 30)) + 
  labs(y="") + 
  theme_minimal_grid() + 
  theme(axis.text.y =  element_blank(), 
        plot.margin = margin(l=-1))  
## `summarise()` has grouped output by 'days_btwn_decrease_thresh'. You can override using the `.groups` argument.
plot_c

plot_grid(plot_a, plot_b, plot_c, nrow = 1, rel_widths = c(1,1,1.15))

#ggsave("./images/timing.pdf", width = 9, height = 3)
county_set %>% 
  select(nchs, days_btwn_stayhome_thresh) %>% 
  filter(!is.na(days_btwn_stayhome_thresh)) %>% 
  group_by(days_btwn_stayhome_thresh, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, days_btwn_stayhome_thresh) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>% 
  filter(days_btwn_stayhome_thresh %in% 0)
## `summarise()` has grouped output by 'days_btwn_stayhome_thresh'. You can override using the `.groups` argument.
## # A tibble: 6 x 6
##   days_btwn_stayhome_thresh nchs      n sum_n cum_n rel_n
##                       <dbl> <fct> <int> <int> <int> <dbl>
## 1                         0 1         1    56    15 0.268
## 2                         0 2         9   119    63 0.529
## 3                         0 3         8   114    64 0.561
## 4                         0 4         4    64    34 0.531
## 5                         0 5         4    48    35 0.729
## 6                         0 6         1    21    16 0.762
county_set %>% 
  select(nchs, days_btwn_decrease_thresh) %>% 
  filter(!is.na(days_btwn_decrease_thresh)) %>% 
  group_by(days_btwn_decrease_thresh, nchs) %>% 
    summarise(n = n()) %>% 
  ungroup() %>% 
  arrange(nchs, days_btwn_decrease_thresh) %>% 
  group_by(nchs) %>% 
    mutate(sum_n = sum(n), 
           cum_n = cumsum(n), 
           rel_n = cum_n / sum_n
           ) %>% 
  ungroup() %>% 
  filter(nchs == 6)
## `summarise()` has grouped output by 'days_btwn_decrease_thresh'. You can override using the `.groups` argument.
## # A tibble: 14 x 6
##    days_btwn_decrease_thresh nchs      n sum_n cum_n  rel_n
##                        <dbl> <fct> <int> <int> <int>  <dbl>
##  1                       -27 6         1    22     1 0.0455
##  2                       -21 6         1    22     2 0.0909
##  3                       -19 6         1    22     3 0.136 
##  4                       -18 6         1    22     4 0.182 
##  5                       -16 6         3    22     7 0.318 
##  6                       -14 6         2    22     9 0.409 
##  7                       -12 6         1    22    10 0.455 
##  8                       -11 6         2    22    12 0.545 
##  9                        -9 6         3    22    15 0.682 
## 10                        -8 6         1    22    16 0.727 
## 11                        -7 6         1    22    17 0.773 
## 12                        -6 6         1    22    18 0.818 
## 13                        -5 6         3    22    21 0.955 
## 14                        -1 6         1    22    22 1

Looking for Examples to include in the six panel plot

county_set %>% 
  filter(days_btwn_decrease_stayhome <=  1,
         days_btwn_decrease_stayhome >= -1 
         ) %>% 
  select(county, state, nchs, days_btwn_decrease_stayhome) %>% 
  arrange(nchs)
## # A tibble: 65 x 4
##    county          state      nchs  days_btwn_decrease_stayhome
##    <chr>           <chr>      <fct>                       <dbl>
##  1 Alameda         California 1                               1
##  2 Orange          California 1                               1
##  3 San Francisco   California 1                               0
##  4 Santa Clara     California 1                               0
##  5 Cook            Illinois   1                               0
##  6 Erie            New York   1                              -1
##  7 queens county   New York   1                               1
##  8 richmond county New York   1                               0
##  9 Contra Costa    California 2                               1
## 10 Marin           California 2                               1
## # … with 55 more rows
county_set %>% 
  filter(state == "New York"
         ) %>% 
  select(county, state, nchs, days_btwn_decrease_stayhome, stayhome, decrease_50_total_visiting) %>% 
  arrange(nchs)
## # A tibble: 22 x 6
##    county     state   nchs  days_btwn_decrease_… stayhome   decrease_50_total_v…
##    <chr>      <chr>   <fct>                <dbl> <date>     <date>              
##  1 bronx cou… New Yo… 1                        3 2020-03-23 2020-03-26          
##  2 Erie       New Yo… 1                       -1 2020-03-23 2020-03-22          
##  3 kings cou… New Yo… 1                        3 2020-03-23 2020-03-26          
##  4 Monroe     New Yo… 1                       -2 2020-03-23 2020-03-21          
##  5 new york … New Yo… 1                       -3 2020-03-23 2020-03-20          
##  6 queens co… New Yo… 1                        1 2020-03-23 2020-03-24          
##  7 richmond … New Yo… 1                        0 2020-03-23 2020-03-23          
##  8 Dutchess   New Yo… 2                       -3 2020-03-23 2020-03-20          
##  9 Nassau     New Yo… 2                       -1 2020-03-23 2020-03-22          
## 10 Niagara    New Yo… 2                       -1 2020-03-23 2020-03-22          
## # … with 12 more rows
county_set %>% 
  filter(days_btwn_decrease_stayhome < 0) %>% 
  select(county, state, nchs, 
         days_btwn_decrease_stayhome, 
         stayhome, decrease_50_total_visiting) %>%
  arrange( nchs)
## # A tibble: 360 x 6
##    county     state      nchs  days_btwn_decrease… stayhome   decrease_50_total…
##    <chr>      <chr>      <fct>               <dbl> <date>     <date>            
##  1 Jefferson  Alabama    1                      -4 2020-03-27 2020-03-23        
##  2 Maricopa   Arizona    1                      -9 2020-04-01 2020-03-23        
##  3 Denver     Colorado   1                      -4 2020-03-25 2020-03-21        
##  4 District … District … 1                     -14 2020-04-02 2020-03-19        
##  5 Duval      Florida    1                      -2 2020-03-26 2020-03-24        
##  6 Hillsboro… Florida    1                      -6 2020-03-28 2020-03-22        
##  7 Miami-Dade Florida    1                      -4 2020-03-26 2020-03-22        
##  8 Orange     Florida    1                      -6 2020-03-27 2020-03-21        
##  9 Pinellas   Florida    1                      -2 2020-03-27 2020-03-25        
## 10 Fulton     Georgia    1                      -4 2020-03-25 2020-03-21        
## # … with 350 more rows
county_train_stayhome %>% 
  filter(date == stayhome) %>% 
  select(fips, nchs, date, stayhome, days_since_thresh)
## # A tibble: 221 x 5
##    fips  nchs  date       stayhome   days_since_thresh
##    <chr> <fct> <date>     <date>                 <dbl>
##  1 01017 5     2020-04-05 2020-04-05                 7
##  2 01055 4     2020-04-05 2020-04-05                 4
##  3 01081 4     2020-04-05 2020-04-05                 6
##  4 01093 6     2020-04-05 2020-04-05                 4
##  5 01097 3     2020-04-05 2020-04-05                 9
##  6 01101 3     2020-04-05 2020-04-05                 5
##  7 01117 2     2020-04-05 2020-04-05                 5
##  8 01123 6     2020-04-05 2020-04-05                 6
##  9 04005 4     2020-04-01 2020-04-01                 8
## 10 04013 1     2020-04-01 2020-04-01                10
## # … with 211 more rows